Cadences#

Hide imports
import os
from collections import defaultdict, Counter

from git import Repo
import dimcat as dc
import ms3
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from utils import STD_LAYOUT, CADENCE_COLORS, color_background, value_count_df, get_repo_name, print_heading, resolve_dir
Hide source
CORPUS_PATH = os.path.abspath(os.path.join('..', '..'))
print_heading("Notebook settings")
print(f"CORPUS_PATH: {CORPUS_PATH!r}")
CORPUS_PATH = resolve_dir(CORPUS_PATH)
Notebook settings
-----------------

CORPUS_PATH: '/home/runner/work/workflow_deployment/romantic_piano_corpus'
Hide source
repo = Repo(CORPUS_PATH)
print_heading("Data and software versions")
print(f"Data repo '{get_repo_name(repo)}' @ {repo.commit().hexsha[:7]}")
print(f"dimcat version {dc.__version__}")
print(f"ms3 version {ms3.__version__}")
Data and software versions
--------------------------

Data repo 'romantic_piano_corpus' @ 0a9eadd
dimcat version 0.3.0
ms3 version 2.2.1
dataset = dc.Dataset()
dataset.load(directory=CORPUS_PATH, parse_tsv=False)
[annotated|all|default]
All corpora
-----------
View: This view is called 'annotated'. It 
	- excludes pieces that are not contained in the metadata,
	- filters out file extensions requiring conversion (such as .xml),
	- excludes review files and folders, and
	- includes only facets containing 'expanded'.

                               has     active expanded       
                          metadata       view detected parsed
corpus                                                       
beethoven_piano_sonatas        yes  annotated       64     64
chopin_mazurkas                yes  annotated       55     55
debussy_suite_bergamasque      yes  annotated        4      4
dvorak_silhouettes             yes  annotated       12     12
grieg_lyric_pieces             yes  annotated       66     66
liszt_pelerinage               yes  annotated       19     19
medtner_tales                  yes  annotated       19     19
schumann_kinderszenen          yes  annotated       13     13
tchaikovsky_seasons            yes  annotated       12     12

11/12 facets are excluded from this view.


There are 1 orphans that could not be attributed to any of the respective corpus's pieces.
N = 264 annotated pieces, 264 parsed dataframes.

Metadata#

all_metadata = dataset.data.metadata()
assert len(all_metadata) > 0, "No pieces selected for analysis."
print(f"Concatenated 'metadata.tsv' files cover {len(all_metadata)} of the {dataset.data.count_pieces()} scores.")
all_metadata.reset_index(level=1).groupby(level=0).nth(0).iloc[:,:20]
Concatenated 'metadata.tsv' files cover 264 of the 264 scores.
piece TimeSig KeySig last_mc last_mn length_qb last_mc_unfolded last_mn_unfolded length_qb_unfolded volta_mcs all_notes_qb n_onsets n_onset_positions guitar_chord_count form_label_count label_count annotated_key harmony_version annotators reviewers
corpus
beethoven_piano_sonatas 01-1 1: 2/2 1: -4 154 152 608.0 308.0 304.0 1216.0 1476.00 1679 985 0 0 241 f 2.3.0 Lars & Ya-Chuan (2.2.0), John Heilig (2.3.0) AN
chopin_mazurkas BI105-2op30-2 1: 3/4 1: 2 65 64 193.0 65.0 64.0 193.0 711.00 810 274 0 0 116 b 2.3.0 Wendelin Bitzan (1.0.0), Adrian Nagel (2.2.0),... JH, AN, DK
debussy_suite_bergamasque l075-01_suite_prelude 1: 4/4 1: -1 89 89 356.0 89.0 89.0 356.0 1533.67 1721 870 0 0 274 F 2.3.0 Adrian Nagel (2.1.1), Amelia Brey (2.3.0) AB, AN
dvorak_silhouettes op08n01 1: 6/8 1: 4, 7: -5, 49: 4 54 52 156.5 54.0 52.0 156.5 658.75 957 288 0 0 80 c# 2.3.0 Daniel Grote (2.1.1), Hanné Becker (2.3.0) Johannes Hentschel (2.1.1), AN
grieg_lyric_pieces op12n01 1: 2/4 1: -3 23 23 46.0 23.0 23.0 46.0 135.50 268 156 0 0 43 Eb 2.3.0 Adrian Nagel (2.1.1), John Heilig (2.30) Adrian Nagel
liszt_pelerinage 160.01_Chapelle_de_Guillaume_Tell 1: 4/4 1: 0 97 97 388.0 97.0 97.0 388.0 1902.42 2879 1069 0 0 174 C 2.3.0 Adrian Nagel (2.1.1), Amelia Brey (2.3.0) Johannes Hentschel (1-33 & 82-97), AB, AN
medtner_tales op08n01 1: 4/8 1: -3 81 81 162.0 81.0 81.0 162.0 603.00 1481 528 0 0 213 c 2.3.0 Wendelin Bitzan (2.2.0), John Heilig (2.3.0) Adrian Nagel, DK
schumann_kinderszenen n01 1: 2/4 1: 1 22 22 44.0 44.0 44.0 88.0 134.33 241 141 0 0 44 G 2.3.0 Tal Soker (2.1.1), John Heilig (2.3.0) AN, JHei, JH
tchaikovsky_seasons op37a01 1: 3/4 1: 3, 29: 1, 63: 3 103 103 309.0 103.0 103.0 309.0 1058.17 1537 829 0 0 313 A 2.3.0 Adrian Nagel (2.1.1), John Heilig (2.3.0) Johannes Hentschel, AN

All annotation labels from the selected pieces#

all_labels = dataset.data.get_facet('expanded')

print(f"{len(all_labels.index)} hand-annotated harmony labels:")
all_labels.iloc[:20].style.apply(color_background, subset="chord")
57455 hand-annotated harmony labels:
      mc mn quarterbeats quarterbeats_all_endings duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend
corpus piece i                                                                  
beethoven_piano_sonatas 01-1 0 1 0 0 0 9.000000 0 3/4 2/2 2 1 f.i{ nan f i nan i nan i nan nan nan nan nan { m True True (0, -3, 1) () 0 0 nan
1 4 3 9 9 8.000000 0 0 2/2 2 1 V65 nan f i nan V65 nan V nan 65 nan nan nan nan Mm7 True True (5, 2, -1, 1) () 1 5 nan
2 6 5 17 17 4.000000 0 0 2/2 2 1 i nan f i nan i nan i nan nan nan nan nan nan m True True (0, -3, 1) () 0 0 nan
3 7 6 21 21 4.000000 0 0 2/2 2 1 #viio6 nan f i nan #viio6 nan #vii o 6 nan nan nan nan o True True (2, -1, 5) () 5 2 nan
4 8 7 25 25 2.000000 0 0 2/2 2 1 i6 nan f i nan i6 nan i nan 6 nan nan nan nan m True True (-3, 1, 0) () 0 -3 nan
5 8 7 27 27 2.000000 1/2 1/2 2/2 2 1 iio6 nan f i nan iio6 nan ii o 6 nan nan nan nan o True True (-1, -4, 2) () 2 -1 nan
6 9 8 29 29 1.000000 0 0 2/2 2 1 V(4)} nan f i nan V(4) nan V nan nan 4 nan nan } M True True (1, 0, 2) () 1 1 nan
7 9 8 30 30 2.000000 1/4 1/4 2/2 2 1 V|HC nan f i nan V nan V nan nan nan nan HC nan M True True (1, 5, 2) () 1 1 nan
8 9 8 32 32 9.000000 3/4 3/4 2/2 2 1 v{ nan f i nan v nan v nan nan nan nan nan { m True True (1, -2, 2) () 1 1 nan
9 12 11 41 41 4.000000 0 0 2/2 2 1 III.IVM2 ii7(2) f III nan IVM2 nan IV M 2 nan nan nan nan MM7 True False (4, -1, 3, 0) () -1 4 nan
10 13 12 45 45 4.000000 0 0 2/2 2 1 ii7 nan f III nan ii7 nan ii nan 7 nan nan nan nan mm7 True False (2, -1, 3, 0) () 2 2 nan
11 14 13 49 49 4.000000 0 0 2/2 2 1 V43 nan f III nan V43 nan V nan 43 nan nan nan nan Mm7 True False (2, -1, 1, 5) () 1 2 nan
12 15 14 53 53 4.000000 0 0 2/2 2 1 I nan f III nan I nan I nan nan nan nan nan nan M True False (0, 4, 1) () 0 0 nan
13 16 15 57 57 1.000000 0 0 2/2 2 1 ii6(2) nan f III nan ii6(2) nan ii nan 6 2 nan nan nan m True False (-1, 3, 4) () 2 -1 nan
14 16 15 58 58 1.000000 1/4 1/4 2/2 2 1 ii6 nan f III nan ii6 nan ii nan 6 nan nan nan nan m True False (-1, 3, 2) () 2 -1 nan
15 16 15 59 59 2.000000 1/2 1/2 2/2 2 1 V65/V nan f III nan V65/V nan V nan 65 nan V nan nan Mm7 True False (6, 3, 0, 2) () 2 6 nan
16 17 16 61 61 3.000000 0 0 2/2 2 1 V|HC} nan f III nan V nan V nan nan nan nan HC } M True False (1, 5, 2) () 1 1 nan
17 17 16 64 64 1.000000 3/4 3/4 2/2 2 1 I6{ nan f III nan I6 nan I nan 6 nan nan nan { M True False (4, 1, 0) () 0 4 nan
18 18 17 65 65 1.000000 0 0 2/2 2 1 ii6(2) nan f III nan ii6(2) nan ii nan 6 2 nan nan nan m True False (-1, 3, 4) () 2 -1 nan
19 18 17 66 66 1.000000 1/4 1/4 2/2 2 1 ii6 nan f III nan ii6 nan ii nan 6 nan nan nan nan m True False (-1, 3, 2) () 2 -1 nan

Filtering out pieces without cadence annotations#

hascadence = dc.HasCadenceAnnotationsFilter().process_data(dataset)
assert () in hascadence.indices and len(hascadence.indices[()]) > 0, "No cadences found."
print(f"Before: {len(dataset.indices[()])} pieces; after removing those without cadence labels: {len(hascadence.indices[()])}")
Before: 264 pieces; after removing those without cadence labels: 258

Show corpora containing pieces with cadence annotations#

grouped_by_corpus = dc.CorpusGrouper().process_data(hascadence)
corpora = {group[0]: f"{len(ixs)} pieces" for group, ixs in  grouped_by_corpus.indices.items()}
print(f"{len(corpora)} corpora with {sum(map(len, grouped_by_corpus.indices.values()))} pieces containing cadence annotations:")
corpora
9 corpora with 258 pieces containing cadence annotations:
{'beethoven_piano_sonatas': '64 pieces',
 'chopin_mazurkas': '50 pieces',
 'debussy_suite_bergamasque': '4 pieces',
 'dvorak_silhouettes': '12 pieces',
 'grieg_lyric_pieces': '65 pieces',
 'liszt_pelerinage': '19 pieces',
 'medtner_tales': '19 pieces',
 'schumann_kinderszenen': '13 pieces',
 'tchaikovsky_seasons': '12 pieces'}

All annotation labels from the selected pieces#

all_labels = hascadence.get_facet('expanded')

print(f"{len(all_labels.index)} hand-annotated harmony labels:")
all_labels.iloc[:10, 13:].style.apply(color_background, subset="chord")
56656 hand-annotated harmony labels:
      localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend
corpus fname interval                                        
beethoven_piano_sonatas 01-1 [0.0, 9.0) i nan i nan i nan nan nan nan nan { m True True (0, -3, 1) () 0 0 nan
[9.0, 17.0) i nan V65 nan V nan 65 nan nan nan nan Mm7 True True (5, 2, -1, 1) () 1 5 nan
[17.0, 21.0) i nan i nan i nan nan nan nan nan nan m True True (0, -3, 1) () 0 0 nan
[21.0, 25.0) i nan #viio6 nan #vii o 6 nan nan nan nan o True True (2, -1, 5) () 5 2 nan
[25.0, 27.0) i nan i6 nan i nan 6 nan nan nan nan m True True (-3, 1, 0) () 0 -3 nan
[27.0, 29.0) i nan iio6 nan ii o 6 nan nan nan nan o True True (-1, -4, 2) () 2 -1 nan
[29.0, 30.0) i nan V(4) nan V nan nan 4 nan nan } M True True (1, 0, 2) () 1 1 nan
[30.0, 32.0) i nan V nan V nan nan nan nan HC nan M True True (1, 5, 2) () 1 1 nan
[32.0, 41.0) i nan v nan v nan nan nan nan nan { m True True (1, -2, 2) () 1 1 nan
[41.0, 45.0) III nan IVM2 nan IV M 2 nan nan nan nan MM7 True False (4, -1, 3, 0) () -1 4 nan

Metadata#

dataset_metadata = hascadence.data.metadata()
hascadence_metadata = dataset_metadata.loc[hascadence.indices[()]]
hascadence_metadata.index.rename('dataset', level=0, inplace=True)
hascadence_metadata.head()
TimeSig KeySig last_mc last_mn length_qb last_mc_unfolded last_mn_unfolded length_qb_unfolded volta_mcs all_notes_qb ... typesetter text pdf score integrity comments staff_3_ambitus staff_3_instrument PDF staff_4_ambitus staff_4_instrument
dataset piece
beethoven_piano_sonatas 01-1 1: 2/2 1: -4 154 152 608.0 308.0 304.0 1216.0 1476.00 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
01-2 1: 3/4 1: -1 62 61 183.0 124.0 122.0 366.0 526.17 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
01-3 1: 3/4 1: -4, 43: -1 77 73 219.0 196.0 186.0 558.0 565.50 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
01-4 1: 2/2 1: -4 199 196 790.0 392.0 390.0 1560.0 [[[57, 58], [59, 60, 61]]] 2326.83 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
02-1 1: 2/4 1: 3, 127: 0, 230: 3 342 336 679.5 672.0 664.0 1336.0 [[[115, 116, 117, 118], [119, 120, 121, 122, 1... 1695.75 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 69 columns

mean_composition_years = hascadence_metadata.groupby(level=0).composed_end.mean().astype(int).sort_values()
chronological_order = mean_composition_years.index.to_list()
bar_data = pd.concat([mean_composition_years.rename('year'), 
                      hascadence_metadata.groupby(level='dataset').size().rename('pieces')],
                     axis=1
                    ).reset_index()
fig = px.bar(bar_data, x='year', y='pieces', color='dataset', title='Pieces contained in the dataset')
fig.update_traces(width=5)

Overall#

  • PAC: Perfect Authentic Cadence

  • IAC: Imperfect Authentic Cadence

  • HC: Half Cadence

  • DC: Deceptive Cadence

  • EC: Evaded Cadence

  • PC: Plagal Cadence

print(f"{all_labels.cadence.notna().sum()} cadence labels.")
value_count_df(all_labels.cadence)
3024 cadence labels.
counts %
cadence
PAC 1311 0.433532
HC 922 0.304894
IAC 648 0.214286
PC 61 0.020172
EC 51 0.016865
DC 31 0.010251
px.pie(all_labels[all_labels.cadence.notna()], names="cadence", color="cadence", color_discrete_map=CADENCE_COLORS)

Per dataset#

cadence_count_per_dataset = all_labels.groupby("corpus").cadence.value_counts()
cadence_fraction_per_dataset = cadence_count_per_dataset / cadence_count_per_dataset.groupby(level=0).sum()
px.bar(cadence_fraction_per_dataset.rename('count').reset_index(), x='corpus', y='count', color='cadence',
      color_discrete_map=CADENCE_COLORS, category_orders=dict(dataset=chronological_order))
fig = px.pie(cadence_count_per_dataset.rename('count').reset_index(), names='cadence', color='cadence', values='count', 
       facet_col='corpus', facet_col_wrap=4, height=2000, color_discrete_map=CADENCE_COLORS)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**STD_LAYOUT)

Per phrase#

Number of cadences per phrase#

segmented = dc.PhraseSlicer().process_data(grouped_by_corpus)
phrases = segmented.get_slice_info()
phrase_segments = segmented.get_facet("expanded")
phrase_gpb = phrase_segments.groupby(level=[0,1,2])
local_keys_per_phrase = phrase_gpb.localkey.unique().map(tuple)
n_local_keys_per_phrase = local_keys_per_phrase.map(len)
phrases_with_keys = pd.concat([n_local_keys_per_phrase.rename('n_local_keys'),
                               local_keys_per_phrase.rename('local_keys'),
                               phrases], axis=1)
phrases_with_cadences = pd.concat([
    phrase_gpb.cadence.nunique().rename('n_cadences'),
    phrase_gpb.cadence.unique().rename('cadences').map(lambda l: tuple(e for e in l if not pd.isnull(e))),
    phrases_with_keys
], axis=1)
value_count_df(phrases_with_cadences.n_cadences, counts="#phrases")
#phrases %
n_cadences
1 2839 0.796130
0 661 0.185362
2 65 0.018228
3 1 0.000280
n_cad = phrases_with_cadences.groupby(level='corpus').n_cadences.value_counts().rename('counts').reset_index().sort_values('n_cadences')
n_cad.n_cadences = n_cad.n_cadences.astype(str)
fig = px.bar(n_cad, x='corpus', y='counts', color='n_cadences', height=800, barmode='group',
             labels=dict(n_cadences="#cadences in a phrase"),
             category_orders=dict(dataset=chronological_order)
      )
fig.show()

Combinations of cadence types for phrases with more than one cadence#

value_count_df(phrases_with_cadences[phrases_with_cadences.n_cadences > 1].cadences)
counts %
cadences
(HC, PAC) 24 0.363636
(EC, PAC) 9 0.136364
(PAC, HC) 6 0.090909
(EC, HC) 6 0.090909
(DC, PAC) 5 0.075758
(IAC, PAC) 4 0.060606
(EC, IAC) 3 0.045455
(PC, PAC) 2 0.030303
(DC, IAC) 1 0.015152
(IAC, EC) 1 0.015152
(HC, DC) 1 0.015152
(DC, HC) 1 0.015152
(EC, DC, PAC) 1 0.015152
(HC, PC) 1 0.015152
(IAC, HC) 1 0.015152

Positioning of cadences within phrases#

df_rows = []
y_position = 0
for ix in phrases_with_cadences[phrases_with_cadences.n_cadences > 0].sort_values('duration_qb').index:
    df = phrase_segments.loc[ix]
    description = str(ix)
    if df.cadence.notna().any():
        interval = ix[2]
        df_rows.append((y_position, interval.length, "end of phrase", description))
        start_pos = interval.left
        cadences = df.loc[df.cadence.notna(), ['quarterbeats', 'cadence']]
        cadences.quarterbeats -= start_pos
        for cadence_x, cadence_type in cadences.itertuples(index=False, name=None):
            df_rows.append((y_position, cadence_x, cadence_type, description))
        y_position += 1
    #else:
    #    df_rows.append((y_position, pd.NA, pd.NA, description))
    
data = pd.DataFrame(df_rows, columns=["phrase_ix", "x", "marker", "description"])
fig = px.scatter(data[data.x.notna()], x='x', y="phrase_ix", color="marker", hover_name="description", height=3000,
                labels=dict(marker='legend'), color_discrete_map=CADENCE_COLORS)
fig.update_traces(marker_size=5)
fig.update_yaxes(autorange="reversed")
fig.show()

Cadence ultima#

phrase_segments = segmented.get_facet("expanded")
cadence_selector = phrase_segments.cadence.notna()
missing_chord_selector = phrase_segments.chord.isna()
cadence_with_missing_chord_selector = cadence_selector & missing_chord_selector
missing = phrase_segments[cadence_with_missing_chord_selector]
expanded = ms3.expand_dcml.expand_labels(phrase_segments[cadence_with_missing_chord_selector], propagate=False, chord_tones=True, skip_checks=True)
phrase_segments.loc[cadence_with_missing_chord_selector] = expanded
print(f"Ultima harmony missing for {(phrase_segments.cadence.notna() & phrase_segments.bass_note.isna()).sum()} cadence labels.")
Ultima harmony missing for 9 cadence labels.

Ultimae as Roman numeral#

def highlight(row, color="#ffffb3"):
    if row.counts < 10:
        return [None, None, None, None]
    else:
        return ["background-color: {color};"] * 4

cadence_counts = all_labels.cadence.value_counts()
ultima_root = phrase_segments.groupby(['localkey_is_minor', 'cadence']).numeral.value_counts().rename('counts').to_frame().reset_index()
ultima_root.localkey_is_minor = ultima_root.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_root.style.apply(highlight, axis=1)
fig = px.pie(ultima_root, names='numeral', values='counts', 
             facet_row='cadence', facet_col='localkey_is_minor', 
             height=1500,
             category_orders={'cadence': cadence_counts.index},
            )
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()
#phrase_segments.groupby(level=[0,1,2], group_keys=False).apply(lambda df: df if ((df.cadence == 'PAC') & (df.numeral == 'V')).any() else None)

Ultimae bass note as scale degree#

ultima_bass = phrase_segments.groupby(['localkey_is_minor','cadence']).bass_note.value_counts().rename('counts').reset_index()
ultima_bass.bass_note = ms3.transform(ultima_bass, ms3.fifths2sd, dict(fifths='bass_note', minor='localkey_is_minor'))
ultima_bass.localkey_is_minor = ultima_bass.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_bass.style.apply(highlight, axis=1)
fig = px.pie(ultima_bass, names='bass_note', values='counts', 
             facet_row='cadence', facet_col='localkey_is_minor', 
             height=1500, 
             category_orders={'cadence': cadence_counts.index},
            )
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()

Chord progressions#

PACs with ultima I/i#

def remove_immediate_duplicates(l):
    return tuple(a for a, b in zip(l, (None, ) + l) if a != b)

def get_progressions(selected='PAC', last_row={}, feature='chord', dataset=None, as_series=True, remove_duplicates=False):
    """Uses the nonlocal variable phrase_segments."""
    last_row = {k: v if isinstance(v, tuple) else (v,) for k, v in last_row.items()}
    progressions = []

    for (corp, fname, *_), df in phrase_segments[phrase_segments[feature].notna()].groupby(level=[0,1,2]):
        if dataset is not None and dataset not in corp:
            continue
        if (df.cadence == selected).fillna(False).any():
            # remove chords after the last cadence label
            df = df[df.cadence.fillna(method='bfill').notna()]
            # group segments leading up to a cadence label
            cadence_groups = df.cadence.notna().shift().fillna(False).cumsum()
            for i, cadence in df.groupby(cadence_groups):
                last_r = cadence.iloc[-1]
                typ = last_r.cadence
                if typ != selected:
                    continue
                if any(last_r[feat] not in values for feat, values in last_row.items()):
                    continue
                if remove_duplicates:
                    progressions.append(remove_immediate_duplicates(cadence[feature].to_list()))
                else:
                    progressions.append(tuple(cadence[feature]))
    if as_series:
        return pd.Series(progressions, dtype='object')
    return progressions
chord_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'chord')
print(f"Progressions for {len(chord_progressions)} cadences:")
value_count_df(chord_progressions, "chord progressions")
Progressions for 1293 cadences:
counts %
chord progressions
(V, V7, I, ii6(2), ii6, V7, I) 12 0.009281
(I, V7, I) 11 0.008507
(i, VM7, i, V7, V7(#2), V7, i) 8 0.006187
(I, V43, I, V6, I, V43, I, V7, I) 6 0.004640
(i6, i, #viio6, VI6, V6, vo6, IV6, #viio2, V(6), #viio64, i) 6 0.004640
... ... ...
(V7, I6, viio6, I, ii6, I6, ii, viio, I, viio6, I6, ii6, viio7/V, V, I6, viio6, I, ii6, I6, ii, viio, I, viio6, I6, ii6, V(64), V7, I) 1 0.000773
(I, V43, V2, I6, I, V(64)/V, V7/V, V, V2, I6, I, V6, V, I, ii6, V, I, V43, V2, I6, I, V(64)/V, V7/V, V, V2, I6, I, V6, I, ii6, V7(9), V7, I) 1 0.000773
(V, V2, I6, V43, I, ii6, V(64), V7, I) 1 0.000773
(I, V7, I, V7, I, V7, I, V7, I, V7, I) 1 0.000773
(V65(b9)/ii, ii, V65(b9), I, ii6, viio7/V, V7(9), V7, I(9), I) 1 0.000773

996 rows × 2 columns

numeral_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'numeral')
value_count_df(numeral_progressions, "numeral progressions")
counts %
numeral progressions
(I, V, V, I) 15 0.011601
(I, V, I) 13 0.010054
(V, V, I, ii, ii, V, I) 12 0.009281
(I, V, I, V, I, V, I, V, I) 9 0.006961
(i, V, i) 8 0.006187
... ... ...
(I, I, V, I, V, V, I, V, I, IV, V, V, I) 1 0.000773
(I, V, I, V, I, I, V, V, V, I, V, I, V, I, ii, ii, vii, I, ii, ii, vii, I, ii, ii, V, V, I) 1 0.000773
(I, V, I, V, I, V, #vii, ii, V, V, I) 1 0.000773
(V, V, V, V, V, V, V, V, V, #vii, vi, ii, V, V, I) 1 0.000773
(V, ii, V, I, ii, vii, V, V, I, I) 1 0.000773

924 rows × 2 columns

numeral_prog_no_dups = numeral_progressions.map(remove_immediate_duplicates)
value_count_df(numeral_prog_no_dups)
counts %
(I, V, I) 43 0.033256
(i, V, i, V, i) 19 0.014695
(I, V, I, V, I) 16 0.012374
(I, V, I, V, I, V, I, V, I) 13 0.010054
(V, I, ii, V, I) 12 0.009281
... ... ...
(I, V, iv, ii, V, I, V, iv, ii, V, i) 1 0.000773
(i, iv, bII, V, I, IV, ii, V, I) 1 0.000773
(I, IV, ii, V, vi, I, IV, ii, V, I) 1 0.000773
(I, V, IV, V, IV, iv, V, iv, V, iv, V, iv, V, i, V, bII, V, bIII, vii, V, I) 1 0.000773
(V, ii, V, I, ii, vii, V, I) 1 0.000773

805 rows × 2 columns

PACs ending on scale degree 1#

Scale degrees expressed w.r.t. major scale, regardless of actual key.

bass_progressions = get_progressions('PAC', dict(bass_note=0), 'bass_note')
bass_prog = bass_progressions.map(ms3.fifths2sd)
print(f"Progressions for {len(bass_progressions)} cadences:")
value_count_df(bass_prog, "bass progressions")
Progressions for 1191 cadences:
counts %
bass progressions
(1, 5, 1) 20 0.016793
(5, 5, 1, 4, 4, 5, 1) 12 0.010076
(1, 5, 1, 5, 5, 5, 1) 8 0.006717
(1, 5, 5, 1) 8 0.006717
(1, 4, 1, 4, 5, 5, 1) 6 0.005038
... ... ...
(1, 5, 1, 5, 1, 5, 1, 5, 1, 5, 1) 1 0.000840
(1, 6, 4, 5, 3, 4, 5, 5, 1) 1 0.000840
(3, 4, 3, 7, 1, 1, 5, 4, 3, 2, 1, 7, 1, 4, 4, 3, 4, 4, 3, 4, 4, 5, 5, 1) 1 0.000840
(1, 2, 1, 7, 1, 1, 4, 5, 5, 1) 1 0.000840
(#1, 2, 7, 1, 4, #4, 5, 5, 1, 1) 1 0.000840

846 rows × 2 columns

bass_prog_no_dups = bass_prog.map(remove_immediate_duplicates)
value_count_df(bass_prog_no_dups)
counts %
(1, 5, 1) 34 0.028547
(1, 5, 1, 5, 1) 29 0.024349
(5, 1, 5, 1) 16 0.013434
(5, 1, 4, 5, 1) 14 0.011755
(5, 1) 12 0.010076
... ... ...
(3, 4, 3, 7, 1, 5, 4, 3, 2, 1, 7, 1, 4, 3, 4, 3, 4, 5, 1) 1 0.000840
(1, 2, 1, 7, 1, 4, 5, 1) 1 0.000840
(1, 4, 1, 4, 1, 4, 1, 4, 1) 1 0.000840
(6, 2, 5, 1) 1 0.000840
(#1, 2, 7, 1, 4, #4, 5, 1) 1 0.000840

768 rows × 2 columns

def make_sankey(data, labels, node_pos=None, margin={'l': 10, 'r': 10, 'b': 10, 't': 10}, pad=20, color='auto', **kwargs):
    if color=='auto':
        unique_labels = set(labels)
        color_step = 100 / len(unique_labels)
        unique_colors = {label: f'hsv({round(i*color_step)}%,100%,100%)' for i, label in enumerate(unique_labels)}
        color = list(map(lambda l: unique_colors[l], labels))
    fig = go.Figure(go.Sankey(
        arrangement = 'snap',
        node = dict(
          pad = pad,
          #thickness = 20,
          #line = dict(color = "black", width = 0.5),
          label = labels,
          x = [node_pos[i][0] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
          y = [node_pos[i][1] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
          color = color,
          ),
        link = dict(
          source = data.source,
          target = data.target,
          value = data.value
          ),
        ),
     )

    fig.update_layout(margin=margin, **kwargs)
    return fig

def progressions2graph_data(progressions, cut_at_stage=None):
    stage_nodes = defaultdict(dict)
    edge_weights = Counter()
    node_counter = 0
    for progression in progressions:
        previous_node = None
        for stage, current in enumerate(reversed(progression)):
            if cut_at_stage and stage > cut_at_stage:
                break
            if current in stage_nodes[stage]:
                current_node = stage_nodes[stage][current]
            else:
                stage_nodes[stage][current] = node_counter
                current_node = node_counter
                node_counter += 1
            if previous_node is not None:
                edge_weights.update([(current_node, previous_node)])
            previous_node = current_node
    return stage_nodes, edge_weights

def graph_data2sankey(stage_nodes, edge_weights):
    data = pd.DataFrame([(u, v, w) for (u, v), w in edge_weights.items()], columns = ['source', 'target', 'value'])
    node2label = {node: label for stage, nodes in stage_nodes.items() for label, node in nodes.items()}
    labels = [node2label[i] for i in range(len(node2label))]
    return make_sankey(data, labels)

def plot_progressions(progressions, cut_at_stage=None):
    stage_nodes, edge_weights = progressions2graph_data(progressions, cut_at_stage=cut_at_stage)
    return graph_data2sankey(stage_nodes, edge_weights)

Chordal roots for the 3 last stages#

plot_progressions(numeral_prog_no_dups, cut_at_stage=3)

Complete chords for the last four stages in major#

pac_major = get_progressions('PAC', dict(numeral='I', localkey_is_minor=False), 'chord')
plot_progressions(pac_major, cut_at_stage=4)

Bass degrees for the last 6 stages.#

plot_progressions(bass_prog_no_dups, cut_at_stage=7)

Bass degrees without accidentals#

def remove_sd_accidentals(t):
    return tuple(map(lambda sd: sd[-1], t))
                  
bass_prog_no_acc_no_dup = bass_prog.map(remove_sd_accidentals).map(remove_immediate_duplicates)
plot_progressions(bass_prog_no_acc_no_dup, cut_at_stage=7)

HCs ending on V#

half = get_progressions('HC', dict(numeral='V'), 'bass_note').map(ms3.fifths2sd)
print(f"Progressions for {len(half)} cadences:")
plot_progressions(half.map(remove_immediate_duplicates), cut_at_stage=5)
Progressions for 880 cadences: